1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| #!/bin/bash instance="lake" export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)" export SPARK_PID_DIR="${SPARK_HOME}/pid-${instance}" export SPARK_LOG_DIR="${SPARK_HOME}/logs-${instance}"
CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
exec "${SPARK_HOME}"/sbin/spark-daemon.sh submit $CLASS $instance \ --name "Thrift ${instance}" \ --hiveconf hive.server2.thrift.port=10199 \ --hiveconf hive.default.fileformat=Orc \ --hiveconf hive.server2.authentication=CUSTOM \ --master yarn \ --queue a17 \ --deploy-mode client \ --driver-cores 2 \ --driver-memory 5G \ --executor-cores 6 \ --executor-memory 15G \ --num-executors 3 \ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \ --conf spark.sql.catalog.prod=org.apache.iceberg.spark.SparkSessionCatalog \ --conf spark.sql.catalog.spark_catalog.type=hive \ --conf spark.driver.maxResultSize=200MB \ --conf spark.default.parallelism=500 \ --conf spark.sql.shuffle.partitions=500 \ --conf spark.sql.adaptive.enabled=true \ --conf spark.scheduler.mode=FAIR \ --conf spark.network.timeout=6000s \ --conf spark.memory.fraction=0.8 \ --conf spark.dynamicAllocation.shuffleTracking.enabled=true \ --conf spark.dynamicAllocation.shuffleTracking.timeout=180000 \ --conf spark.dynamicAllocation.enabled=true \ --conf spark.dynamicAllocation.minExecutors=3 \ --conf spark.dynamicAllocation.maxExecutors=50 \ --jars hdfs:///spark/jdbc/ojdbc6-11.2.0.3.jar,hdfs:///spark/jdbc/clickhouse-jdbc-0.4.6.jar,hdfs:///spark/jdbc/guava-31.0.1-jre.jar,hdfs:///spark/jdbc/iceberg-spark-runtime-3.5_2.12-1.6.0.jar
|